This is the companion R Markdown document to the following presentations that were delivered in Winter 2014-2015:
Adding the TIQ-TEST functions
## Some limitations from not being an R package: Setting the Working directory
tiqtest.dir = file.path("..", "tiq-test")
current.dir = setwd(tiqtest.dir)
source("tiq-test.R")
## Setting the root data path to where it should be in this repo
.tiq.data.setRootPath(file.path(current.dir, "data"))
## INFO [2015-01-28 11:21:37 PST] pid=5826 tiq.data.setRootPath: Setting path to '/Users/alexcp/src/tiq-test-Winter2015/data'
We have roughly 2 months of data available on this public dataset:
print(tiq.data.getAvailableDates("raw", "public_outbound"))
## [1] "20141001" "20141002" "20141003" "20141004" "20141005" "20141006"
## [7] "20141007" "20141008" "20141009" "20141010" "20141011" "20141012"
## [13] "20141013" "20141014" "20141015" "20141016" "20141017" "20141018"
## [19] "20141019" "20141020" "20141021" "20141022" "20141023" "20141024"
## [25] "20141025" "20141026" "20141027" "20141028" "20141029" "20141030"
## [31] "20141031" "20141101" "20141102" "20141103" "20141104" "20141105"
## [37] "20141106" "20141107" "20141108" "20141109" "20141110" "20141111"
## [43] "20141112" "20141113" "20141114" "20141115" "20141116" "20141117"
## [49] "20141118" "20141119" "20141120" "20141121" "20141122" "20141123"
## [55] "20141124" "20141125" "20141126" "20141127" "20141128" "20141129"
## [61] "20141130"
print(tiq.data.getAvailableDates("raw", "public_inbound"))
## [1] "20141001" "20141002" "20141003" "20141004" "20141005" "20141006"
## [7] "20141007" "20141008" "20141009" "20141010" "20141011" "20141012"
## [13] "20141013" "20141014" "20141015" "20141016" "20141017" "20141018"
## [19] "20141019" "20141020" "20141021" "20141022" "20141023" "20141024"
## [25] "20141025" "20141026" "20141027" "20141028" "20141029" "20141030"
## [31] "20141031" "20141101" "20141102" "20141103" "20141104" "20141105"
## [37] "20141106" "20141107" "20141108" "20141109" "20141110" "20141111"
## [43] "20141112" "20141113" "20141114" "20141115" "20141116" "20141117"
## [49] "20141118" "20141119" "20141120" "20141121" "20141122" "20141123"
## [55] "20141124" "20141125" "20141126" "20141127" "20141128" "20141129"
## [61] "20141130"
This time, we also have a couple of private data feeds over some of this time, but the information in them cannot be shared publically as a part of this release:
if (tiq.data.isDatasetAvailable("raw", "private1")) {
print(tiq.data.getAvailableDates("raw", "private1"))
}
## [1] "20141001" "20141002" "20141004" "20141005" "20141006" "20141007"
## [7] "20141008" "20141009" "20141010" "20141011" "20141012" "20141013"
## [13] "20141014" "20141015" "20141016" "20141017" "20141018" "20141019"
## [19] "20141020" "20141021" "20141022" "20141023" "20141024" "20141025"
## [25] "20141026" "20141027" "20141028" "20141029" "20141030" "20141031"
## [31] "20141101" "20141102" "20141103" "20141104" "20141105" "20141106"
## [37] "20141107" "20141108" "20141109" "20141110" "20141111" "20141112"
## [43] "20141113" "20141114" "20141115" "20141116" "20141117" "20141118"
## [49] "20141119" "20141120" "20141121" "20141122" "20141123" "20141124"
## [55] "20141125" "20141126" "20141127" "20141128" "20141129" "20141130"
if (tiq.data.isDatasetAvailable("raw", "private2")) {
print(tiq.data.getAvailableDates("raw", "private2"))
}
## [1] "20141113" "20141114" "20141115" "20141116" "20141117" "20141118"
## [7] "20141119" "20141120" "20141121" "20141122" "20141123" "20141124"
## [13] "20141125" "20141126" "20141127" "20141128" "20141129" "20141130"
This is an example of “RAW” (not enriched) outbound data imported from combine output
outbound.ti = tiq.data.loadTI("raw", "public_outbound", "20141101")
outbound.ti[, list(entity, type, direction, source, date)]
## entity type direction source date
## 1: 1.168.15.140 IPv4 outbound alienvault 2014-11-01
## 2: 1.93.6.86 IPv4 outbound alienvault 2014-11-01
## 3: 100.42.211.4 IPv4 outbound alienvault 2014-11-01
## 4: 101.227.172.24 IPv4 outbound alienvault 2014-11-01
## 5: 101.36.81.55 IPv4 outbound alienvault 2014-11-01
## ---
## 11388: up.frigo2000.it FQDN outbound zeus 2014-11-01
## 11389: update.odeen.eu FQDN outbound zeus 2014-11-01
## 11390: update.rifugiopontese.it FQDN outbound zeus 2014-11-01
## 11391: vahendkarasis4.com FQDN outbound zeus 2014-11-01
## 11392: welcahllyn.com FQDN outbound zeus 2014-11-01
We can use the same loadTI function to also gather the enriched datasets:
enrich.ti = tiq.data.loadTI("enriched", "public_outbound", "20141101")
enrich.ti = enrich.ti[, notes := NULL]
tail(enrich.ti)
## entity type direction source date asnumber
## 1: 94.102.63.153 IPv4 outbound zeus 2014-11-01 29073
## 2: 94.103.36.55 IPv4 outbound zeus 2014-11-01 47894
## 3: 95.163.121.12 IPv4 outbound zeus 2014-11-01 12695
## 4: 98.131.185.136 IPv4 outbound zeus 2014-11-01 32392
## 5: 98.131.185.136 IPv4 outbound zeus 2014-11-01 32392
## 6: 99.181.5.83 IPv4 outbound zeus 2014-11-01 7018
## asname country host
## 1: Ecatel Network NL NA
## 2: VeriTeknik Bilisim Ltd. TR NA
## 3: Digital Networks CJSC RU NA
## 4: Ecommerce Corporation US NA
## 5: Ecommerce Corporation US projects.globaltronics.net
## 6: AT&T Services, Inc. US NA
## rhost
## 1: exadomains.net
## 2: datacenter.veriteknik.com
## 3: NA
## 4: NA
## 5: NA
## 6: adsl-99-181-5-83.dsl.irvnca.sbcglobal.net
This specific outbound dataset has the following sources included:
outbound.ti = tiq.data.loadTI("raw", "public_outbound", "20141101")
unique(outbound.ti$source)
## [1] "alienvault" "feodo" "malcode"
## [4] "malcode_zones" "malwaredomainlist" "malwaredomains"
## [7] "malwaregroup" "palevotracker" "spyeye"
## [10] "sslbl" "zeus"
We can do the same for the inbound data we have to see the sources we have available:
inbound.ti = tiq.data.loadTI("raw", "public_inbound", "20141101")
unique(inbound.ti$source)
## [1] "alienvault" "autoshun" "blocklistde"
## [4] "botscout" "bruteforceblocker" "charleshaley"
## [7] "ciarmy" "dragonresearch" "dshield"
## [10] "honeypot" "openbl" "packetmail"
## [13] "virbl"
Here are some results of running the Novelty test on the inbound data:
inbound.novelty = tiq.test.noveltyTest("public_inbound", "20141001", "20141130",
select.sources=c("alienvault", "blocklistde",
"dshield", "charleshaley"))
##
|
| | 0%
|
|= | 2%
|
|== | 3%
|
|=== | 5%
|
|==== | 7%
|
|===== | 8%
|
|====== | 10%
|
|======== | 12%
|
|========= | 13%
|
|========== | 15%
|
|=========== | 17%
|
|============ | 18%
|
|============= | 20%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 25%
|
|================= | 27%
|
|================== | 28%
|
|==================== | 30%
|
|===================== | 32%
|
|====================== | 33%
|
|======================= | 35%
|
|======================== | 37%
|
|========================= | 38%
|
|========================== | 40%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 45%
|
|============================== | 47%
|
|=============================== | 48%
|
|================================ | 50%
|
|================================== | 52%
|
|=================================== | 53%
|
|==================================== | 55%
|
|===================================== | 57%
|
|====================================== | 58%
|
|======================================= | 60%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 65%
|
|=========================================== | 67%
|
|============================================ | 68%
|
|============================================== | 70%
|
|=============================================== | 72%
|
|================================================ | 73%
|
|================================================= | 75%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|==================================================== | 80%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 85%
|
|======================================================== | 87%
|
|========================================================= | 88%
|
|========================================================== | 90%
|
|============================================================ | 92%
|
|============================================================= | 93%
|
|============================================================== | 95%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|=================================================================| 100%
tiq.test.plotNoveltyTest(inbound.novelty)
And results running on the outbound data:
outbound.novelty = tiq.test.noveltyTest("public_outbound", "20141001", "20141130",
select.sources=c("alienvault", "malwaregroup",
"malcode", "zeus"))
##
|
| | 0%
|
|= | 2%
|
|== | 3%
|
|=== | 5%
|
|==== | 7%
|
|===== | 8%
|
|====== | 10%
|
|======== | 12%
|
|========= | 13%
|
|========== | 15%
|
|=========== | 17%
|
|============ | 18%
|
|============= | 20%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 25%
|
|================= | 27%
|
|================== | 28%
|
|==================== | 30%
|
|===================== | 32%
|
|====================== | 33%
|
|======================= | 35%
|
|======================== | 37%
|
|========================= | 38%
|
|========================== | 40%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 45%
|
|============================== | 47%
|
|=============================== | 48%
|
|================================ | 50%
|
|================================== | 52%
|
|=================================== | 53%
|
|==================================== | 55%
|
|===================================== | 57%
|
|====================================== | 58%
|
|======================================= | 60%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 65%
|
|=========================================== | 67%
|
|============================================ | 68%
|
|============================================== | 70%
|
|=============================================== | 72%
|
|================================================ | 73%
|
|================================================= | 75%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|==================================================== | 80%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 85%
|
|======================================================== | 87%
|
|========================================================= | 88%
|
|========================================================== | 90%
|
|============================================================ | 92%
|
|============================================================= | 93%
|
|============================================================== | 95%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|=================================================================| 100%
tiq.test.plotNoveltyTest(outbound.novelty)
This is an example of applying the Overlap Test to our inbound dataset
overlap = tiq.test.overlapTest("public_inbound", "20141101", "enriched",
select.sources=NULL)
overlap.plot = tiq.test.plotOverlapTest(overlap, title="Overlap Test - Inbound Data - 20141101")
print(overlap.plot)
Similarly, an example applying the Overlap Test to the outbound dataset
overlap = tiq.test.overlapTest("public_outbound", "20141101", "enriched",
select.sources=NULL)
overlap.plot = tiq.test.plotOverlapTest(overlap, title="Overlap Test - Outbound Data - 20141101")
print(overlap.plot)
With the population data we can generate some plot to compare the top quantities of reported IP addresses on a specific date by Country
outbound.pop = tiq.test.extractPopulationFromTI("public_outbound", "country",
date = "20141111",
select.sources=NULL, split.ti=F)
inbound.pop = tiq.test.extractPopulationFromTI("public_inbound", "country",
date = "20141111",
select.sources=NULL, split.ti=F)
complete.pop = tiq.data.loadPopulation("mmgeo", "country")
## Warning in max(tiq.data.getAvailableDates(category, group)): no
## non-missing arguments, returning NA
## WARN [2015-01-28 11:23:14 PST] pid=5826 tiq.data.loadTI: path '/Users/alexcp/src/tiq-test-Winter2015/data/population/mmgeo/NA.csv.gz' is invalid. No data available on date 'NA'.
tiq.test.plotPopulationBars(c(inbound.pop, outbound.pop, complete.pop), "country")
outbound.aging = tiq.test.agingTest("public_outbound", "20141001", "20141130")
tiq.test.plotAgingTest(outbound.aging)
inbound.aging = tiq.test.agingTest("public_inbound", "20141001", "20141130")
tiq.test.plotAgingTest(inbound.aging)
outbound.aging = tiq.test.agingTest("public_outbound", "20141001", "20141130",
split.ti=F)
tiq.test.plotAgingTest(outbound.aging)
private.aging = tiq.test.agingTest("private1", "20141001", "20141130",
split.ti=F)
## WARN [2015-01-28 11:26:55 PST] pid=5826 tiq.data.loadTI: path '/Users/alexcp/src/tiq-test-Winter2015/data/enriched/private1/20141003.csv.gz' is invalid. No data available on date '20141003'.
tiq.test.plotAgingTest(private.aging, density.limit=0.7)
outbound.pop = tiq.test.extractPopulationFromTI("public_outbound", "country",
date = "20141110",
select.sources=NULL, split.ti=F)
private.pop = tiq.test.extractPopulationFromTI("private1", "country",
date = "20141110",
select.sources=NULL, split.ti=F)
tiq.test.plotPopulationBars(c(private.pop, outbound.pop), "country", title="Comparing Private1 and Public Feeds on 20141110")
private.novelty = tiq.test.noveltyTest("private1", "20141001", "20141130", split.tii=F)
##
|
| | 0%
|
|= | 2%
|
|== | 3%WARN [2015-01-28 11:27:04 PST] pid=5826 tiq.data.loadTI: path '/Users/alexcp/src/tiq-test-Winter2015/data/raw/private1/20141003.csv.gz' is invalid. No data available on date '20141003'.
##
|
|=== | 5%
|
|==== | 7%
|
|===== | 8%
|
|====== | 10%
|
|======== | 12%
|
|========= | 13%
|
|========== | 15%
|
|=========== | 17%
|
|============ | 18%
|
|============= | 20%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 25%
|
|================= | 27%
|
|================== | 28%
|
|==================== | 30%
|
|===================== | 32%
|
|====================== | 33%
|
|======================= | 35%
|
|======================== | 37%
|
|========================= | 38%
|
|========================== | 40%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 45%
|
|============================== | 47%
|
|=============================== | 48%
|
|================================ | 50%
|
|================================== | 52%
|
|=================================== | 53%
|
|==================================== | 55%
|
|===================================== | 57%
|
|====================================== | 58%
|
|======================================= | 60%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 65%
|
|=========================================== | 67%
|
|============================================ | 68%
|
|============================================== | 70%
|
|=============================================== | 72%
|
|================================================ | 73%
|
|================================================= | 75%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|==================================================== | 80%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 85%
|
|======================================================== | 87%
|
|========================================================= | 88%
|
|========================================================== | 90%
|
|============================================================ | 92%
|
|============================================================= | 93%
|
|============================================================== | 95%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|=================================================================| 100%
tiq.test.plotNoveltyTest(private.novelty)
outbound.novelty = tiq.test.noveltyTest("public_outbound", "20141001", "20141130",split.tii=F)
##
|
| | 0%
|
|= | 2%
|
|== | 3%
|
|=== | 5%
|
|==== | 7%
|
|===== | 8%
|
|====== | 10%
|
|======== | 12%
|
|========= | 13%
|
|========== | 15%
|
|=========== | 17%
|
|============ | 18%
|
|============= | 20%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 25%
|
|================= | 27%
|
|================== | 28%
|
|==================== | 30%
|
|===================== | 32%
|
|====================== | 33%
|
|======================= | 35%
|
|======================== | 37%
|
|========================= | 38%
|
|========================== | 40%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 45%
|
|============================== | 47%
|
|=============================== | 48%
|
|================================ | 50%
|
|================================== | 52%
|
|=================================== | 53%
|
|==================================== | 55%
|
|===================================== | 57%
|
|====================================== | 58%
|
|======================================= | 60%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 65%
|
|=========================================== | 67%
|
|============================================ | 68%
|
|============================================== | 70%
|
|=============================================== | 72%
|
|================================================ | 73%
|
|================================================= | 75%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|==================================================== | 80%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 85%
|
|======================================================== | 87%
|
|========================================================= | 88%
|
|========================================================== | 90%
|
|============================================================ | 92%
|
|============================================================= | 93%
|
|============================================================== | 95%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|=================================================================| 100%
tiq.test.plotNoveltyTest(outbound.novelty)
This is an example of applying the Overlap Test to our inbound dataset
overlap = tiq.test.overlapTest("public_inbound", "20141101", "enriched",
select.sources=NULL)
overlap.plot = tiq.test.plotOverlapTest(overlap, title="Overlap Test - Inbound Data - 20141101")
print(overlap.plot)
overlap = tiq.test.overlapTest(c("public_outbound", "private1"), "20141101", "enriched",
split.ti=F, select.sources=NULL)
tiq.test.plotOverlapTest(overlap, title="OVERLAP - public_outbound VS private1 - 20141101")
overlap2 = tiq.test.overlapTest(c("public_outbound", "private2"), "20141115", "enriched",
split.ti=F, select.sources=NULL)
tiq.test.plotOverlapTest(overlap2, title="OVERLAP - public_outbound VS private2 - 20141115")